The World of Computer Software

home *** CD-ROM | disk | FTP | other *** search

/ The World of Computer Software / The World of Computer Software.iso / tags18.zip / ASMTAG.E < prev next >

Wrap

Text File | 1992-03-29 | 26KB | 776 lines

/* EPSHeader File: asmtag.c Author: J. Kercheval Created: Sun, 07/14/1991 17:25:26 */ /* EPSRevision History J. Kercheval Sun, 07/14/1991 20:25:59 creation J. Kercheval Mon, 07/15/1991 22:47:30 finish finite state machine parser J. Kercheval Wed, 07/17/1991 21:35:43 add IsMember() and get_token() J. Kercheval Thu, 07/18/1991 19:57:34 add flags checking J. Kercheval Sun, 07/21/1991 15:58:56 add comment block support J. Kercheval Sat, 07/27/1991 21:16:53 remove public post process support J. Kercheval Sat, 07/27/1991 22:50:49 performance considerations (+10%) J. Kercheval Sat, 08/10/1991 18:14:46 Speed up IsMember() J. Kercheval Fri, 09/13/1991 01:17:05 add when_loading() to remap def_srch_case_map[] J. Kercheval Thu, 10/03/1991 12:27:37 fix logic outputting local labels J. Kercheval Sat, 10/05/1991 14:06:33 add ASMTagWant defines */ /* * This file implements tagging for .ASM and .INC files which contain 80x86 * assembler in the MASM/TASM syntax. This file defines no new commands and * is intended to work with the tags package included with V5.0 of Epsilon. * There is no problem using modified tags packages providing calls are made * to tags_suffix_???() routines in the same way Epsilon does this and that * an output routine add_tag() is used. All that should be required is to * compile and load this file and this module will be used transparently to * you. * * This module implements tagging for UNION, STRUC, MACRO, PROC, LABEL * keywords as well as for implicit labels (label:) and for data defintions * (ie. equ, =, dq, dw, db, etc....). The performance cost on a per tag * basis is negligable, but since more tagging is done, you should expect a * practical 10%-20% performance hit on a per file basis. This tagger is not * intended to do all of your work for you but is designed to be used in * conjunction with the tags generator I have developed and is now available. * This file implements the same semantic parser as is found in that * executable. Use the executable in your make file for very fast and * updated tags. If you have problems finding it, contact me and I can point * the way... * * There is defined at the end of this module a when_loading() function which * alters the default search case map to allow *correct* (or at least * consistent sorting with sort routines external to Epsilon. In particular, * to produce the same sort order as any UNIX, VMS or HP style sort or with * the tags generator this module is supposed to coexist with this mapping * must be done. You should see no difference in the location of sorted * buffers except for lines starting with ^, [, \, ] and _. * * This code is dedicated to the public domain with the caveat that Lugaru is * welcome to use this within their distribution source code which is * supplied with Epsilon. * * Good Tagging, * * jbk@wrq.com * * John Kercheval * 127 NW Bowdoin Pl #105 * Seattle, WA 98107-4960 * August 10, 1991 */ #include <eel.h> #ifndef BOOLEAN #define BOOLEAN int #define TRUE 1 #define FALSE 0 #endif /* This is a list of the types of tokens you may want to tag. Set them true * if you want that particular type of tag. */ #define ASMTagWantProc TRUE #define ASMTagWantMacro TRUE #define ASMTagWantLabel TRUE #define ASMTagWantStruc TRUE #define ASMTagWantUnion TRUE #define ASMTagWantDefine TRUE /* * The finite state machine allows the following interesting paths * * 1 - Discard, Parse1, Symbol1 * 2 - Discard, Parse1, Parse2, Symbol2 * 3 - Discard, Parse1, Parse2, Define * * all the important cases follow one of these paths according to MASM/TASM * syntax. The exit state is for finish up routine calls and some paths not * covered here are simple error paths and probably result from syntax errors * * enum state { Discard, Parse1, Parse2, Symbol1, Symbol2, Define, Exit }; */ /* * emulate an enumerated type for the state machine */ #define Discard 0 #define Parse1 1 #define Parse2 2 #define Symbol1 3 #define Symbol2 4 #define Define 5 #define Exit 6 typedef int State; #define COMMENT_CHAR ';' #define SYMBOL_SIZE 15 /*---------------------------------------------------------------------------- * * The symbol lists represent all the symbols we are interested in either * obtaining or ignoring. The first element of each of these symbol lists is * a string containing all the first characters within the symbol list. This * allows faster rejection for IsMember() which is called often. * ---------------------------------------------------------------------------*/ /* symbols which are not significant for this parser */ char ASM_NOP_Sym[][SYMBOL_SIZE] = { "cpbfnwo", /* list of starting characters of symbols * below */ "c", /* C language declaration */ "pascal", /* PASCAL language declaration */ "basic", /* BASIC language declaration */ "fortran", /* FORTRAN language declaration */ "prolog", /* PROLOG language declaration */ "nolanguage", /* generic language declaration */ "windows", /* WINDOWS exit and entry modifier */ "oddnear", /* overlay modifier */ "oddfar", /* overlay modifier */ "normal", /* normal procedure entry/exit code */ "\0" }; /* symbols which begin a comment block */ char ASM_comment_block[][SYMBOL_SIZE] = { "c", /* list of starting characters of symbols * below */ "comment", /* begin comment block, next character is * delimiter */ "\0" }; /* create the function for determining if a character is a delimiter */ #define IsDelim(c) ( _ASM_delim_table[c] ) /* the indexed table for white space character lookup */ BOOLEAN _ASM_delim_table[256]; /* valid delimiters for this syntax */ char ASM_delim[] = " \t\n;:=.,\"()<>[]*-+/"; /* create the function for determining if a character is a whitespace */ #define IsWhite(c) ( _ASM_white_table[c] ) /* the indexed table for white space character lookup */ BOOLEAN _ASM_white_table[256]; /* whitespace characters */ char ASM_white[] = " \t\v\f"; /* symbols which both are delimiters and a special token, these are special tokens only when found at the the beginning of a string of 1 or more delimiters */ char ASM_delim_Sym[] = "=:"; /* symbols which fit into the Define state and represent a tagged symbol */ /* state Define depends on the token ":" being at index 1 in this list */ char ASM_def[][SYMBOL_SIZE] = { ":e=cd", /* list of starting characters of symbols * below */ ":", /* local labels */ "equ", /* equivalence */ "=", /* equivalence */ "catstr", /* concatenated and named strings */ "db", /* named byte data definition */ "dw", /* named word data definition */ "dd", /* named double word data definition */ "dp", /* named 6 byte far pointer data area * definition */ "df", /* named 6 byte far pointer definition */ "dq", /* named quad word data definition */ "dt", /* named 10 byte data area */ "\0" }; /* symbols which fit into the Symbol state and represent a tagged symbol */ char ASM_sym[][SYMBOL_SIZE] = { "pmlsu", /* list of starting character of symbols * below */ "proc", /* procedures */ "macro", /* macros */ "label", /* local labels */ "struc", /* structures */ "union", /* unions */ "\0" }; /*---------------------------------------------------------------------------- * * ASMParserInit() initializes the tables required by the parser The tables * used are a simple boolean index which are true if the character * corresponding to the index is a member of the associated table. * ---------------------------------------------------------------------------*/ ASMParserInit() { char *s; int i; /* init the entire block to FALSE */ for (i = 0; i < 256; i++) { _ASM_delim_table[i] = FALSE; _ASM_white_table[i] = FALSE; } /* set the characters in the delim set to TRUE */ for (s = ASM_delim; *s; s++) { _ASM_delim_table[*s] = TRUE; } /* NULL is also a delimiter */ _ASM_delim_table['\0'] = TRUE; /* set the characters in the white set to TRUE */ for (s = ASM_white; *s; s++) { _ASM_white_table[*s] = TRUE; } } /*---------------------------------------------------------------------------- * * strchr() is the standard string library function strchr() * ---------------------------------------------------------------------------*/ char *strchr(s, c) char *s; char c; { char *ret = s; while (*ret) { if (*ret == c) return ret; ret++; } if (*ret == c) return ret; return NULL; } /*---------------------------------------------------------------------------- * * ASMSymbolWanted() returns true if the index into the sym token list is one * of the wanted symbols according to the ASMTagWant defines. The indexes * belong with the following ASMTagWant defines: * * Flag Symbol Index * --------------- ------- ----- * ASMTagWantProc "proc" 1 * ASMTagWantMacro "macro" 2 * ASMTagWantLabel "label" 3 * ASMTagWantStruc "struc" 4 * ASMTagWantUnion "union" 5 * ---------------------------------------------------------------------------*/ BOOLEAN ASMSymbolWanted(index) int index; { /* return true if the associated flag is true */ switch (index) { case 1: return ASMTagWantProc; break; case 2: return ASMTagWantMacro; break; case 3: return ASMTagWantLabel; break; case 4: return ASMTagWantStruc; break; case 5: return ASMTagWantUnion; break; default: return FALSE; break; } } /*---------------------------------------------------------------------------- * * ASMIsMember() takes the token passed and check for membership in the null * terminated array, tokenlist, and return TRUE if a member and FALSE * otherwise, index is the index into the token list of the symbol if return * value is TRUE * ---------------------------------------------------------------------------*/ BOOLEAN ASMIsMember(token_list, token, index) char token_list[][SYMBOL_SIZE]; char *token; int *index; { int old_case_fold = case_fold; /* use non case sensitive string compare */ case_fold = 1; /* look for dirty rejection */ if (!strchr(token_list[0], tolower(token[0]))) return FALSE; /* march through array until membership is determined */ for (*index = 1; *token_list[*index]; (*index)++) { /* return true if token found */ if (!strfcmp(token, token_list[*index])) { case_fold = old_case_fold; return TRUE; } } /* did not find it */ case_fold = old_case_fold; return FALSE; } /*---------------------------------------------------------------------------- * * ASM_get_token() will obtain the next token in the line pointed to by lptr * and in addition will return FALSE if EOL is reached or a comment character * is the first non whitespace character found * ---------------------------------------------------------------------------*/ BOOLEAN ASM_get_token(lptr, token) char **lptr; char *token; { char *s; /* start location in string */ int token_length; /* the length of the current token */ int dummy; /* a temporary variable */ /* loop until we have a valid token or end of string */ do { /* move past whitespace */ while (IsWhite(**lptr)) { (*lptr)++; } /* return false if end of line */ if (!**lptr) return FALSE; /* check if comment */ if (**lptr == COMMENT_CHAR) { return FALSE; } /* check of delimiter token */ if (strchr(ASM_delim_Sym, **lptr)) { token[0] = **lptr; token[1] = '\0'; (*lptr)++; } else { /* save the beginning location */ s = *lptr; /* move to the next delimiter in the line */ while (!IsDelim(**lptr)) { (*lptr)++; } /* get the token */ token_length = *lptr - s; strncpy(token, s, token_length); token[token_length] = '\0'; } } while (ASMIsMember(ASM_NOP_Sym, token, &dummy)); return TRUE; } /*---------------------------------------------------------------------------- * * getline() obtain the next line in the buffer * ---------------------------------------------------------------------------*/ BOOLEAN getline(inbuf, line) char *inbuf; char *line; { char *oldbuf = bufname; int cur_point = point; bufname = inbuf; nl_forward(); if (cur_point != point) { grab(cur_point, point, line); } else { return FALSE; } bufname = oldbuf; return TRUE; } /*---------------------------------------------------------------------------- * * output_tag() places the tag in the correct format into the output buffer * by a call to add_tag() * ---------------------------------------------------------------------------*/ output_tag(outbuf, line, symbol, infname, line_number, char_number) char *outbuf; char *line; char *symbol; char *infname; int line_number; int char_number; { /* this is just a shell call to add_tag() defined in tags.e but is an * ideal place to add code for other output formats or extra output * information etc. */ add_tag(symbol, char_number); return; } /*---------------------------------------------------------------------------- * * ASMtags() tags an input stream assuming input format of ASM 80x86 format * in MASM/TASM syntax * ---------------------------------------------------------------------------*/ ASMTags(inbuf, infname, outbuf) char *inbuf; char *infname; char *outbuf; { State state; /* the current state of the parser */ char line[256]; /* the current input line */ char cur_token[256]; /* the current token */ char prev_token[256]; /* the previous token */ char *lptr; /* pointer into line for token parser */ char *prev_lptr; /* pointer into line for previous token */ int line_number; /* the current line in the file */ int line_length; /* the length of the current line */ int char_number; /* the current character in the file */ int symbol_index; /* the index into the token list of the * symbol */ char *oldbuf = bufname; spot oldpoint = alloc_spot(); spot oldmark = alloc_spot(); /* save current buffer state */ *oldpoint = point; *oldmark = mark; /* init the engine */ ASMParserInit(); cur_token[0] = '\0'; prev_token[0] = '\0'; state = Discard; line_number = 0; line_length = 0; char_number = 0; lptr = prev_lptr = (char *) NULL; for (;;) { switch (state) { case Discard: /* current line is not valid */ /* if EOF then return */ if (getline(inbuf, line)) { lptr = line; /* increment counters */ line_number++; /* char_number increments by length of previous line */ char_number += line_length; /* line length */ line_length = strlen(line); state = Parse1; } else { state = Exit; } break; case Parse1: /* parsing for first *special* token */ /* get the next valid token */ if (!ASM_get_token(&lptr, cur_token)) { /* if no token left or a comment as first non white space * char in remainder of line */ state = Discard; } else { /* move the cur_token to prev_token */ strcpy(prev_token, cur_token); /* check for membership in the tagging symbol club */ if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) { state = Symbol1; } else { /* check if comment block */ if (ASMIsMember(ASM_comment_block, cur_token, &symbol_index)) { /* get the next non white character, this makes * the assumption that the delimiter character is * on the same line as the comment symbol. If the * delimiter character is not on the current line * then parsing continues normally on the next * line. */ while (IsWhite(*lptr)) { lptr++; } if (*lptr) { /* this is the delimiter character, store it * and move lptr past it */ *cur_token = *lptr; lptr++; /* move over comment block, remembering to * update line info as we go */ while (*lptr != *cur_token) { /* get a new line if end of line */ if (!*lptr) { if (!getline(inbuf, line)) { *cur_token = *lptr; } else { lptr = line; /* increment counters */ line_number++; /* char_number increments by * length of previous line */ char_number += line_length; /* line length */ line_length = strlen(line); } } else { lptr++; } } } state = Discard; } else { /* nothing special, parse the next symbol */ state = Parse2; } } } break; case Parse2: /* parsing for second *special* token */ /* save the previous position */ prev_lptr = lptr; /* get the next token */ if (!ASM_get_token(&lptr, cur_token)) { /* no token left, reset machine */ state = Discard; } else { if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) { /* found a major symbol */ state = Symbol2; } else { if (ASMIsMember(ASM_def, cur_token, &symbol_index)) { /* found a defining token */ state = Define; } else { state = Discard; } } } break; case Symbol1: /* next token, ignore if no token found */ /* get the next symbol and output it */ if (ASM_get_token(&lptr, cur_token)) { if (ASMSymbolWanted(symbol_index)) { output_tag(outbuf, line, cur_token, infname, line_number, char_number + lptr - line - strlen(cur_token)); } } /* reset machine */ state = Discard; break; case Symbol2: /* previous token was the wanted symbol */ /* the previous token is the symbol of interest */ if (ASMSymbolWanted(symbol_index)) { output_tag(outbuf, line, prev_token, infname, line_number, char_number + prev_lptr - line - strlen(prev_token)); } /* reset machine */ state = Discard; break; case Define: /* previous token was the wanted symbol */ /* the previous token is the symbol of interest */ if ((ASMTagWantDefine && symbol_index != 1) || (ASMTagWantLabel && symbol_index == 1)) { output_tag(outbuf, line, prev_token, infname, line_number, char_number + prev_lptr - line - strlen(prev_token)); } /* reset machine */ state = Discard; break; case Exit: /* clean it up */ /* restore original location */ bufname = oldbuf; point = *oldpoint; mark = *oldmark; free_spot(oldpoint); free_spot(oldmark); return; break; default: /* not reached */ break; } } } /*---------------------------------------------------------------------------- * * tag_suffix_asm() and tag_suffix_inc() are recognized procedure names * to the tags package in Epsilon and will be called automatically when * tagging needs to happen for these extensions. tag_suffix_asm() is a * replacement for the routine of the same name defined in tags.e and * tag_suffix_inc() is new. * ---------------------------------------------------------------------------*/ tag_suffix_asm() { /* the third parameter, the output buffer name is not actually used by * anyone but is left here for a time when this information may be * needed. The current algorithm is to let the funtion add_tag() decide * the buffer name to send the output to. As a little more than * coincedence, the name used here is the same used in add_tag() defined * in tags.e */ ASMTags(bufname, filename, "-tags"); } tag_suffix_inc() { tag_suffix_asm(); } #ifdef foo /* rebuild the default character maps */ when_loading() { #define UCLC(up, low) _def_char_class[low] = C_LOWER, \ _def_char_class[up] = C_UPPER, \ _def_srch_case_map[up] = low, \ _def_case_map[low] = up, \ _def_case_map[up] = low int i, j; for (i = 0; i < 256; i++) _def_case_map[i] = _def_srch_case_map[i] = i; for (i = 'A', j = 'a'; i <= 'Z'; i++, j++) UCLC(i, j); for (i = 131; i < 154; i++) _def_char_class[i] = C_LOWER; for (i = 160; i < 164; i++) _def_char_class[i] = C_LOWER; UCLC('Ç', 'ç'); UCLC('Ä', 'ä'); UCLC('Å', 'å'); UCLC('É', 'é'); UCLC('Æ', 'æ'); UCLC('Ö', 'ö'); UCLC('Ü', 'ü'); UCLC('Ñ', 'ñ'); } #endif